#LIBRARY'S
library(tidyverse)
library(plotly)
library(data.table)
library(ggplot2)
library(maps)
library(dplyr)
library(tidyr)
library(lubridate)
Covid-19 Dataset
Download us-states.csv from https://github.com/nytimes/covid-19-data/. README.md for details on file content.
data1 = fread("us-states.csv")
head(data1)
## date state fips cases deaths
## 1: 2020-01-21 Washington 53 1 0
## 2: 2020-01-22 Washington 53 1 0
## 3: 2020-01-23 Washington 53 1 0
## 4: 2020-01-24 Illinois 17 1 0
## 5: 2020-01-24 Washington 53 1 0
## 6: 2020-01-25 California 6 1 0
data1$date = as.Date(data1$date)
data_us = data1 %>%
group_by(state, year_month = format(date, "%Y-%m")) %>%
summarise(fips = max(fips), cases_cum = max(cases), deaths_cum = max(deaths), date=min(date)) %>%
mutate(cases=cases_cum-lag(cases_cum,default=0))
## `summarise()` has grouped output by 'state'. You can override using the
## `.groups` argument.
data_us
## # A tibble: 1,732 × 7
## # Groups: state [56]
## state year_month fips cases_cum deaths_cum date cases
## <chr> <chr> <int> <int> <int> <date> <int>
## 1 Alabama 2020-03 1 999 14 2020-03-13 999
## 2 Alabama 2020-04 1 7068 272 2020-04-01 6069
## 3 Alabama 2020-05 1 17952 630 2020-05-01 10884
## 4 Alabama 2020-06 1 38045 950 2020-06-01 20093
## 5 Alabama 2020-07 1 87723 1580 2020-07-01 49678
## 6 Alabama 2020-08 1 126058 2182 2020-08-01 38335
## 7 Alabama 2020-09 1 154701 2540 2020-09-01 28643
## 8 Alabama 2020-10 1 192285 2967 2020-10-01 37584
## 9 Alabama 2020-11 1 249524 3578 2020-11-01 57239
## 10 Alabama 2020-12 1 361226 4827 2020-12-01 111702
## # ℹ 1,722 more rows
state_plot = data_us %>%
plot_ly(x = ~year_month, y = ~cases, color = ~state, type = 'scatter',mode = 'Path')
state_plot
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
ny_data = data_us %>% filter(state == "New York")
ny_scatter.plot = ny_data %>%
plot_ly(x = ~year_month, y = ~cases, type = 'scatter')
ny_scatter.plot
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
highest_cases = ny_data[which.max(ny_data$cases), ]
highest_cases
## # A tibble: 1 × 7
## # Groups: state [1]
## state year_month fips cases_cum deaths_cum date cases
## <chr> <chr> <int> <int> <int> <date> <int>
## 1 New York 2022-01 36 4789532 64247 2022-01-01 1315562
g = list(
scope = "usa",
projection = list(type = 'albers usa'),
lakecolor = toRGB('white'))
us_data_filtered = data_us[data_us$state %in% state.name, ]
us_data_filtered$state_short.name <- state.abb[match(us_data_filtered$state, state.name)]
dummy = us_data_filtered %>% group_by(state, state_short.name) %>% summarise(cases = max(cases))
## `summarise()` has grouped output by 'state'. You can override using the
## `.groups` argument.
plot_geo(data = dummy) %>%
add_trace(
z = ~cases, text = ~state, span = I(0), locations = ~state_short.name, locationmode = 'USA-states') %>%
layout(geo = g)
dummy = us_data_filtered %>% group_by(state, state_short.name, year_month) %>% summarise(new_cases = max(cases))
## `summarise()` has grouped output by 'state', 'state_short.name'. You can
## override using the `.groups` argument.
plot_geo(data = dummy) %>%
add_trace(
z = ~new_cases, text = ~state, span = I(0), locations = ~state_short.name, locationmode = 'USA-states', frame = ~year_month) %>%
layout(geo = g)